
import os
import sys
import pandas as pd

# 设置项目根目录
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
if BASE_DIR not in sys.path:
    sys.path.insert(0, BASE_DIR)

# 设置路径
main_file = os.path.join(BASE_DIR, "data", "2_missing_value_analysis", "filled", "needFill_indicators.csv")
filled_dir = os.path.join(BASE_DIR, "data", "2_missing_value_analysis", "filled", "filled_result")

# 读取原始待填充数据
df_main = pd.read_csv(main_file, encoding="utf-8-sig")

# 遍历填充值目录下的所有 *_filled.csv 文件
filled_files = [f for f in os.listdir(filled_dir) if f.endswith("_filled.csv")]
indicators = [f.replace("_filled.csv", "") for f in filled_files]

# 合并每个指标填充值
for indicator in indicators:
    filled_path = os.path.join(filled_dir, f"{indicator}_filled.csv")
    if os.path.exists(filled_path):
        df_fill = pd.read_csv(filled_path, encoding="utf-8-sig")
        
        # 合并并填充
        df_main = df_main.merge(df_fill[['序号', '国名Ch', 'Year', indicator]],
                                on=['序号', '国名Ch', 'Year'],
                                how='left',
                                suffixes=('', '_fill'))
        
        df_main[indicator] = df_main[indicator].fillna(df_main[f"{indicator}_fill"])
        df_main.drop(columns=[f"{indicator}_fill"], inplace=True)
        print(f"✅ {indicator} 指标填充值合并完成")
    else:
        print(f"ℹ️ 跳过未找到的指标文件：{indicator}_filled.csv")

# 保存填充后的最终数据
output_path = os.path.join(BASE_DIR, "data", "2_missing_value_analysis", "filled", "needFill_indicators_filled.csv")
df_main.to_csv(output_path, index=False, encoding="utf-8-sig")
print(f"🎉 已保存最终填充结果：{output_path}")
